library(tidyverse)
library(lubridate)
library(haven)
library(stargazer)
library(rdrobust)

#The author uses version 30.1 of the British Election Study Internet Panel, Wave 22.
bes_w22 <- read_dta("BES2019_W22_v30.1.dta") 

#COVID Dataset
covid_new_cases <- read.csv("daily-new-confirmed-covid-19-cases-per-million-people.csv")

covid_new_cases <- covid_new_cases |> 
  mutate(
    date=as_date(Day)
  )

#Data Cleaning and Merge
bes_w22 <- bes_w22 |> mutate(date=as_date(starttime))
bes_w22 <- bes_w22 |>
  mutate(treatment=ifelse(date>="2021-12-07",1L,0L),
         treatment_plus1day=ifelse(date>="2021-12-08",1L,0L), #For Appendix J
         initial_report=case_when( #For Appendix
           date<="2021-11-29"~"Pre_Initial_Report",
           date>="2021-11-30"&date<="2021-12-06"~"Post_Initial_Report") |>  
           factor(levels = c("Pre_Initial_Report","Post_Initial_Report")),
         date_diff=difftime(date,as_date("2021-12-07"),units = "days"),
         date_diff_initial=      #For Appendix M
           difftime(date,as_date("2021-11-30"),units = "days"), 
         convote=ifelse(generalElectionVote==1,1L,0L),
         labvote=ifelse(generalElectionVote==2,1L,0L),
         ctr_gender=as_factor(gender),
         ctr_education=as_factor(p_edlevel),
         ctr_income=as_factor(p_gross_personal),
         ctr_ethnicity=as_factor(p_ethnicity),
         ctr_disability=as_factor(p_disability),
         ctr_unemployed=ifelse(workingStatus==4,1L,0L) |> as_factor())
bes_w22_covid <- left_join(bes_w22,covid_new_cases,by="date") 

# Data Subset for Plots (Used in the analyses of vote choice)
bes22rdd_likecon <- bes_w22 |> filter(likeCon<=10)
bes22rdd_con_likecon <- bes22rdd_likecon |> filter(partyId==1)
bes22rdd_lab_likecon <- bes22rdd_likecon |> filter(partyId==2)
bes22rdd_ind_likecon <- bes22rdd_likecon |> filter(partyId==10)

# Data Subset for Regression Analyses
df_main_base <- bes_w22_covid|> filter(date_diff>=-7,date_diff<=6)
df_main_reg_con <- df_main_base |> filter(partyId==1)
df_main_reg_lab <- df_main_base |> filter(partyId==2)
df_main_reg_ind <- df_main_base |> filter(partyId==10)

#Data subset: Conservative Favorability
df_main_reg_con_likecon <- df_main_reg_con |> filter(likeCon<=10)
df_main_reg_lab_likecon <- df_main_reg_lab |> filter(likeCon<=10)
df_main_reg_ind_likecon <- df_main_reg_ind |> filter(likeCon<=10)

#Data subset: Boris Johnson Favorability
df_main_reg_con_likejohnson <- df_main_reg_con |> filter(likeJohnson<=10)
df_main_reg_lab_likejohnson <- df_main_reg_lab |> filter(likeJohnson<=10)
df_main_reg_ind_likejohnson <- df_main_reg_ind |> filter(likeJohnson<=10)

#Data subset: COVID-19 Performance
df_main_reg_con_covid <- df_main_reg_con |> filter(handleCorona<=10)
df_main_reg_lab_covid <- df_main_reg_lab |> filter(handleCorona<=10)
df_main_reg_ind_covid <- df_main_reg_ind |> filter(handleCorona<=10)

#Data subset: Lockdown Performance
df_main_reg_con_lockdown <- df_main_reg_con |> filter(govtHandlelockdown<=10)
df_main_reg_lab_lockdown <- df_main_reg_lab |> filter(govtHandlelockdown<=10)
df_main_reg_ind_lockdown <- df_main_reg_ind |> filter(govtHandlelockdown<=10)


# Figure 1 ----------------------------------------------------------------

# Construct RD data
make_rd_df <- function(y, x) {
  p <- rdplot(y, as.numeric(x), ci = 95)
  p$vars_bins |>
    select(rdplot_mean_x, rdplot_mean_y, rdplot_se_y) |>
    mutate(
      ll = rdplot_mean_y - 1.96 * rdplot_se_y,
      ul = rdplot_mean_y + 1.96 * rdplot_se_y,
      Treatment = factor(ifelse(rdplot_mean_x >= 0, 1L, 0L))
    )
}

#RD plot function
plot_rd <- function(df, ylim, ylab, label_y) {
  ggplot(df) +
    geom_point(aes(rdplot_mean_x, rdplot_mean_y), size = 4) +
    geom_vline(xintercept = -0.5, linetype = 1) +
    geom_smooth(
      aes(rdplot_mean_x, rdplot_mean_y,
          color = Treatment, linetype = Treatment),
      method = "lm", se = FALSE
    ) +
    geom_vline(xintercept = -7.5, linetype = 2) +
    geom_label(aes(x = -8, y = label_y, label = "First Coverage"), size = 8) +
    geom_label(aes(x = 0,  y = label_y, label = "Video Released"), size = 8) +
    coord_cartesian(ylim = ylim) +
    labs(x = "Date Difference", y = ylab) +
    theme_light() +
    theme(
      text = element_text(size = 28),
      panel.grid = element_blank()
    )
}

# Full Sample (Figure 1a)
df_all <- make_rd_df(
  bes22rdd_likecon$likeCon,
  bes22rdd_likecon$date_diff
)
g_p1_all <- plot_rd(
  df_all,
  ylim = c(2.5, 4.5),
  ylab = "Conservative Party FT",
  label_y = 4.4
)
g_p1_all
# Conservative Supporters (Figure 1b)

df_con <- make_rd_df(
  bes22rdd_con_likecon$likeCon,
  bes22rdd_con_likecon$date_diff
)
g_p1_con <- plot_rd(
  df_con,
  ylim = c(6, 8),
  ylab = "Conservative Party FT",
  label_y = 7.9
)
g_p1_con
# Labour Supporters (Figure 1c)

df_lab <- make_rd_df(
  bes22rdd_lab_likecon$likeCon,
  bes22rdd_lab_likecon$date_diff
)
g_p1_lab <- plot_rd(
  df_lab,
  ylim = c(0.5, 2.5),
  ylab = "Conservative Party FT",
  label_y = 2.4
)
g_p1_lab

# Independents (Figire 1d)
df_ind <- make_rd_df(
  bes22rdd_ind_likecon$likeCon,
  bes22rdd_ind_likecon$date_diff
)
g_p1_ind <- plot_rd(
  df_ind,
  ylim = c(1.8, 3.8),
  ylab = "Conservative Party FT",
  label_y = 3.7
)
g_p1_ind

# Main Analyses -----------------------------------------------------------
#Model
fml <- ~ treatment + age + ctr_gender + ctr_education +
  ctr_income + ctr_unemployed+ ctr_ethnicity + ctr_disability+
  daily_new_confirmed_per_million

run_lm <- function(dv, pid, data, bound = NULL) {
  d <- data |>
    filter(date_diff >= -7, date_diff <= 6, partyId == pid)
  
  if (!is.null(bound) && !is.na(bound)) {
    d <- d |> filter(.data[[dv]] <= bound)
  }
  m <- lm(reformulate(attr(terms(fml), "term.labels"), dv), data = d)
  c(
    est = coef(summary(m))[2,1],
    std = coef(summary(m))[2,2]
  )
}


specs <- tribble(
  ~dv, ~bound,
  "likeCon", 10,
  "likeJohnson", 10,
  "handleCorona", 10,
  "govtHandlelockdown", 10,
  "convote", NA,
  "labvote", NA
)

parties <- c(Cons = 1, Lab = 2, Ind = 10)

#Labels
dv_labels <- c(
  likeCon = "Like Cons (0-10)",
  likeJohnson = "Like Johnson (0-10)",
  handleCorona = "COVID-19 Performance (1-5)",
  govtHandlelockdown = "Lockdown Performance (1-5)",
  convote = "Vote Intention: Con (0-1)",
  labvote = "Vote Intention: Lab (0-1)"
)

# Run linear models for each outcome–party specification,
# store estimation results in a tidy data frame,
# and compute 95% and 90% confidence intervals
df_scandal <- expand_grid(specs, party = names(parties)) |>
  mutate(
    res = map2(
      dv, party,
      ~ run_lm(.x, parties[.y], bes_w22_covid,
               bound = specs$bound[specs$dv == .x])
    )
  ) |>
  unnest_wider(res) |>
  mutate(
    Party = factor(party, levels = c("Cons","Lab","Ind")),
    DV = factor(dv, levels = names(dv_labels), labels = dv_labels),
    ul  = est + 1.96 * std,
    ll  = est - 1.96 * std,
    ul90 = est + 1.645 * std,
    ll90 = est - 1.645 * std
  )

#Function for Figure 2
plot_block <- function(df) {
  ggplot(df) +
    geom_linerange(aes(x = Party, ymin = ll90, ymax = ul90),
                   linewidth = 2, color = "black") +
    geom_pointrange(aes(x = Party, y = est, ymin = ll, ymax = ul),
                    size = 2, linewidth = 1, color = "black") +
    facet_wrap(~ DV) +
    geom_hline(yintercept = 0, linetype = 2) +
    labs(y = "Effect of Partygate Video") +
    theme_light() +
    theme(
      text = element_text(size = 30),
      strip.text = element_text(size = 26, color = "black"),
      strip.background = element_rect(fill = "grey90"),
      panel.grid = element_blank()
    )
}

#Figure 2
#Figure 2a
est_covid <- plot_block(
  df_scandal |>
    filter(dv %in% c("handleCorona", "govtHandlelockdown"))
)
est_covid

#Figure 2b
est_like <- plot_block(
  df_scandal |>
    filter(dv %in% c("likeCon", "likeJohnson"))
)
est_like

#Figure 2c
est_vote <- plot_block(
  df_scandal |>
    filter(dv %in% c("convote", "labvote"))
)
est_vote

ggsave("est_covid.eps",est_covid,width = 12,height = 6,units=c("in"),
       dpi = 1200)
ggsave("est_like.eps",est_like,width = 12,height = 6,units=c("in"),
       dpi = 1200)
ggsave("est_vote.eps",est_vote,width = 12,height = 6,units=c("in"),
       dpi = 1200)


#Function that runs models
run_lm_obj <- function(dv, pid, data, bound = NULL) {
  d <- data |>
    filter(date_diff >= -7, date_diff <= 6, partyId == pid)
  
  if (!is.null(bound) && !is.na(bound)) {
    d <- d |> filter(.data[[dv]] <= bound)
  }
  
  lm(reformulate(attr(terms(fml), "term.labels"), dv), data = d)
}

#Run models
lm_list <- expand_grid(specs, party = names(parties)) |>
  mutate(
    model = map2(
      dv, party,
      ~ run_lm_obj(
        dv = .x,
        pid = parties[.y],
        data = bes_w22_covid,
        bound = specs$bound[specs$dv == .x]
      )
    )
  )

#Tables for latex (Appendix G)
#COVID Performance
stargazer(lm_list$model[[7]],lm_list$model[[8]],lm_list$model[[9]],
          keep = "treatment")

#Lockdown Performance
stargazer(lm_list$model[[10]],lm_list$model[[11]],lm_list$model[[12]],
          keep = "treatment")

#Conservative Favorability
stargazer(lm_list$model[[1]],lm_list$model[[2]],lm_list$model[[3]],
          keep = "treatment")

#Johnson Favorability
stargazer(lm_list$model[[4]],lm_list$model[[5]],lm_list$model[[6]],
          keep = "treatment")

#Vote Intention: Conservative Party
stargazer(lm_list$model[[13]],lm_list$model[[14]],lm_list$model[[15]],
          keep = "treatment")

#Vote Intention: Labour Party
stargazer(lm_list$model[[16]],lm_list$model[[17]],lm_list$model[[18]],
          keep = "treatment")
